# -------------------------------------------------------------------
# Purpose: Creates Table B14
# Author:  Max Posch, 25/07/2025
# Usage:   Source this script to generate the table.
# -------------------------------------------------------------------
# Check that required paths exist
stopifnot(dir.exists(pdataconfanalysis))
stopifnot(dir.exists(poutputappendix))


# Load data
load(file.path(pdataconfanalysis, "surnameCountyLevel19001940.RData"))
d <- copy(surnameCountyLevel19001940)

d[, div := "Northeast"]
d[data.table::between(region, 3, 4), div := "Midwest"]
d[data.table::between(region, 5, 7), div := "South"]
d[data.table::between(region, 8, 9), div := "West"]
d[, div_f := 2]
d[div == "Midwest", div_f := 1]
d[div == "South", div_f := 3]
d[div == "West", div_f := 4]



# Regressions
o <- list()
o <- append(o, list(feols(sum_patents_pc_1900_f_w ~ entropy_namelast_mp_adjp_ws:i(div) + sum_n_namelast_mp_adjp_ws:i(div) | gisjoin_1900^namelast_mp + namelast_mp^year + statefip^year, d, weights = d$wgt_name)))
o <- append(o, list(feols(sum_break_p80_rrfsim05_pc_1900_f_w ~ entropy_namelast_mp_adjp_ws:i(div) + sum_n_namelast_mp_adjp_ws:i(div) | gisjoin_1900^namelast_mp + namelast_mp^year + statefip^year, d, weights = d$wgt_name)))

r <- list()
r <- append(r, list(feols(sum_patents_pc_1900_f_w ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_ws:i(div) + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws:i(div) | gisjoin_1900^namelast_mp + namelast_mp^year + statefip^year, d, weights = d$wgt_name)))
r <- append(r, list(feols(sum_break_p80_rrfsim05_pc_1900_f_w ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_ws:i(div) + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws:i(div) | gisjoin_1900^namelast_mp + namelast_mp^year + statefip^year, d, weights = d$wgt_name)))

i <- list()
i <- append(i, list(feols(sum_patents_pc_1900_f_w ~ 1 | gisjoin_1900^namelast_mp + namelast_mp^year + statefip^year | entropy_namelast_mp_adjp_ws:i(div) + sum_n_namelast_mp_adjp_ws:i(div) ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_ws:i(div) + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws:i(div), d, weights = d$wgt_name)))
i <- append(i, list(feols(sum_break_p80_rrfsim05_pc_1900_f_w ~ 1 | gisjoin_1900^namelast_mp + namelast_mp^year + statefip^year | entropy_namelast_mp_adjp_ws:i(div) + sum_n_namelast_mp_adjp_ws:i(div) ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_ws:i(div) + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws:i(div), d, weights = d$wgt_name)))

# F-statistics
dstata <- d[, .(
  x1 = entropy_namelast_mp_adjp_ws, z1 = iv_lo_entropy_namelast_mp_adjp_fe_immig_ws,
  x2 = sum_n_namelast_mp_adjp_ws, z2 = iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws,
  y1 = sum_patents_pc_1900_f_w, y2 = sum_break_p80_rrfsim05_pc_1900_f_w,
  gisjoin_1900_f, statefip_f, year_f, year_num, namelast_mp_f, wgt_name, div_f
)]
dstata[, x1_div_f_1 := x1 * (div_f == 1)]
dstata[, x1_div_f_2 := x1 * (div_f == 2)]
dstata[, x1_div_f_3 := x1 * (div_f == 3)]
dstata[, x1_div_f_4 := x1 * (div_f == 4)]
dstata[, x2_div_f_1 := x2 * (div_f == 1)]
dstata[, x2_div_f_2 := x2 * (div_f == 2)]
dstata[, x2_div_f_3 := x2 * (div_f == 3)]
dstata[, x2_div_f_4 := x2 * (div_f == 4)]
dstata[, z1_div_f_1 := z1 * (div_f == 1)]
dstata[, z1_div_f_2 := z1 * (div_f == 2)]
dstata[, z1_div_f_3 := z1 * (div_f == 3)]
dstata[, z1_div_f_4 := z1 * (div_f == 4)]
dstata[, z2_div_f_1 := z2 * (div_f == 1)]
dstata[, z2_div_f_2 := z2 * (div_f == 2)]
dstata[, z2_div_f_3 := z2 * (div_f == 3)]
dstata[, z2_div_f_4 := z2 * (div_f == 4)]
commands <- list(
  'ivreghdfe y1 (x1_div_f_1 x1_div_f_2 x1_div_f_3 x1_div_f_4  x2_div_f_1 x2_div_f_2 x2_div_f_3 x2_div_f_4 = z1_div_f_1 z1_div_f_2 z1_div_f_3 z1_div_f_4 z2_div_f_1 z2_div_f_2 z2_div_f_3 z2_div_f_4) [pw = wgt_name], absorb(gisjoin_1900_f#namelast_mp_f statefip_f#year_f year_f#namelast_mp_f) cluster(statefip_f) first ffirst savefirst
    gen swf1 = .
    gen swf2 = .
    gen swf3 = .
    gen swf4 = .
    gen swf5 = .
    gen swf6 = .
    gen swf7 = .
    gen swf8 = .
    replace swf1 = round(e(first)["SWF",1])
    replace swf2 = round(e(first)["SWF",2])
    replace swf3 = round(e(first)["SWF",3])
    replace swf4 = round(e(first)["SWF",4])
    replace swf5 = round(e(first)["SWF",5])
    replace swf6 = round(e(first)["SWF",6])
    replace swf7 = round(e(first)["SWF",7])
    replace swf8 = round(e(first)["SWF",8])
   keep swf*
   keep if _n == 1'
)
swf_results <- as.character(unlist(lapply(commands, get_fstat_from_stata, data.in = dstata)))
swfstat <- swf_results %>% str_split("; ", simplify = TRUE)
swfstat_diversity <- c(paste(swfstat[1, 1:4], collapse = "; "))
swfstat_population <- c(paste(swfstat[1, 5:8], collapse = "; "))
swfstat_diversity <- rep(swfstat_diversity, times = 2)
swfstat_population <- rep(swfstat_population, times = 2)

# Create table
setFixest_dict(
  c(
    entropy_namelast_mp_adjp_ws = "Surname diversity",
    iv_lo_entropy_namelast_mp_adjp_fe_immig_ws = "Predicted surname diversity",
    sum_n_namelast_mp_adjp_ws = "Population",
    iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws = "Predicted population",
    div = "Region",
    sum_patents_pc_1900_f_w = "\\makecell{Patents \\\\ per 1,000 people}", sum_break_p80_rrfsim05_pc_1900_f_w = "\\makecell{Breakthrough patents \\\\ per 1,000 people}",
    year = "Period", statefip = "State", namelast_mp = "Surname", gisjoin_1900 = "County"
  )
)

tablename <- file.path(poutputappendix, "tableB14.tex")
etable(o,
  cluster = ~statefip,
  fitstat = ~n,
  digits = "r3", digits.stats = "r3",
  file = tablename, replace = TRUE,
  style.tex = style.tex("aer"), tex = TRUE
)
add_table_row(tablename, "\\midrule", "\\multicolumn{2}{l}{\\textit{Panel A: Least-squares estimates}} &  \\multicolumn{1}{c}{}\\\\ \\cmidrule(lr){1-3}")
add_table_row(tablename, "Patents", "\\cmidrule(lr){2-2}  \\cmidrule(lr){3-3}")
move_table_row(tablename, "Observations", "bottomrule")
add_table_row(tablename, "    \\\\", c("\\multicolumn{2}{l}{\\textit{Panel B: Reduced-form estimates}} &  \\multicolumn{1}{c}{}\\\\", "\\\\", "\\multicolumn{2}{l}{\\textit{Panel C: Instrumental-variable estimates}} &  \\multicolumn{1}{c}{}\\\\"))

temptable <- file.path(poutputappendix, "temp.tex")
etable(r,
  cluster = ~statefip,
  fitstat = ~n,
  digits = "r3", digits.stats = "r3",
  file = temptable, replace = TRUE,
  style.tex = style.tex("aer"), tex = TRUE
)
estimates_rows <- get_estimates_rows(temptable)
add_table_row(tablename, "Panel B", c("\\cmidrule(lr){1-3}", estimates_rows))

temptable <- file.path(poutputappendix, "temp.tex")
etable(i,
  cluster = ~statefip,
  fitstat = ~n,
  digits = "r3", digits.stats = "r3",
  extralines = list(
    "Sanderson-Windmeijer \\textit{F}-stat: Surname diversity" = swfstat_diversity,
    "Sanderson-Windmeijer \\textit{F}-stat: Population" = swfstat_population
  ),
  file = temptable, replace = TRUE,
  style.tex = style.tex("aer"), tex = TRUE
)
estimates_rows <- get_estimates_rows(temptable)
add_table_row(tablename, "Panel C", c("\\cmidrule(lr){1-3}", estimates_rows))
add_table_row(tablename, "Sanderson", "\\\\", "before")
edit_table_content_fixed(tablename, "entropy\\_namelast\\_mp\\_adjp\\_ws:div::Midwest", "Surname diversity $\\times$ Region $=$ Midwest")
edit_table_content_fixed(tablename, "entropy\\_namelast\\_mp\\_adjp\\_ws:div::Northeast", "Surname diversity $\\times$ Region $=$ Northeast")
edit_table_content_fixed(tablename, "entropy\\_namelast\\_mp\\_adjp\\_ws:div::South", "Surname diversity $\\times$ Region $=$ South")
edit_table_content_fixed(tablename, "entropy\\_namelast\\_mp\\_adjp\\_ws:div::West", "Surname diversity $\\times$ Region $=$ West")
edit_table_content_fixed(tablename, "div::Midwest:sum\\_n\\_namelast\\_mp\\_adjp\\_ws", "Population $\\times$ Region $=$ Midwest")
edit_table_content_fixed(tablename, "div::Northeast:sum\\_n\\_namelast\\_mp\\_adjp\\_ws", "Population $\\times$ Region $=$ Northeast")
edit_table_content_fixed(tablename, "div::South:sum\\_n\\_namelast\\_mp\\_adjp\\_ws", "Population $\\times$ Region $=$ South")
edit_table_content_fixed(tablename, "div::West:sum\\_n\\_namelast\\_mp\\_adjp\\_ws", "Population $\\times$ Region $=$ West")
file.remove(temptable)

cat("Table B14 saved to:", tablename, "\n")